Initial Data Exploration#
SQL join statement to fetch data from the database and into Pandas DataFrame.
{
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.0"
}
}
{'kernelspec': {'display_name': 'Python 3',
'language': 'python',
'name': 'python3'},
'language_info': {'codemirror_mode': {'name': 'ipython', 'version': 3},
'file_extension': '.py',
'mimetype': 'text/x-python',
'name': 'python',
'nbconvert_exporter': 'python',
'pygments_lexer': 'ipython3',
'version': '3.8.0'}}
import pandas as pd
import sqlite3
from ydata_profiling import ProfileReport
# Connect to database and get complete dataset
conn = sqlite3.connect('mobile_phones.db')
query = """
SELECT
p.phone_id,
p.battery_power,
p.clock_speed,
p.m_dep,
p.mobile_wt,
p.n_cores,
p.ram,
p.talk_time,
p.price_range,
s.px_height,
s.px_width,
s.sc_h,
s.sc_w,
c.fc as front_camera,
c.pc as primary_camera,
f.blue,
f.dual_sim,
f.four_g,
f.three_g,
f.touch_screen,
f.wifi,
st.int_memory
FROM phones p
JOIN screen_specs s ON p.phone_id = s.phone_id
JOIN camera_specs c ON p.phone_id = c.phone_id
JOIN phone_features f ON p.phone_id = f.phone_id
JOIN storage_specs st ON p.phone_id = st.phone_id
"""
# Load data into DataFrame
df_db = pd.read_sql_query(query, conn)
conn.close()
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import ydata_profiling
profile = ydata_profiling.ProfileReport(df_db, title="Pandas Profiling Report")
profile